import matplotlib.pyplot as plt
from sklearn import datasets
import pandas as pd
import pandas as pd
import numpy as np
import seaborn as sns
sns.set(color_codes=True)
import warnings
from sklearn.impute import SimpleImputer
warnings.filterwarnings("ignore")
# for csv file
#link='https://drive.google.com/file/d/14aEtw3PE3kZ5fY1i5GrcKQKXVldYRGcS/view?usp=sharing'
#id = link.split("/")[-2]
#downloaded = drive.CreateFile({'id':id})
##downloaded.GetContentFile('owid-covid-data.csv')
df = pd.read_csv('owid-covid-data.csv')
print(df.shape)
# for csv file
#link='https://drive.google.com/file/d/13QYgFjQ3ztf_TxgK21CobYI3hCchAqFm/view?usp=sharing'
#id = link.split("/")[-2]
#downloaded = drive.CreateFile({'id':id})
#downloaded.GetContentFile('govt_index.csv')
df_gov = pd.read_csv('govt_index.csv')
print(df_gov.shape)
# for csv file
#link='https://drive.google.com/file/d/1m1DH6MDIf_A-2FaDASROgiPfyZZmAYMQ/view?usp=sharing'
#id = link.split("/")[-2]
#downloaded = drive.CreateFile({'id':id})
#downloaded.GetContentFile('govt_index.csv')
df_vaccine = pd.read_csv('govt_index.csv')
print(df_vaccine.shape)
df.head()
print("total size of data is = ",df.shape)
print()
missing_values_count = df.isnull().sum()
print("Feature with highest number of missing data is ,")
print(missing_values_count.nlargest(n=50))
print()
print("% data which is missing = ",missing_values_count.sum()/np.product(df.shape)*100)
temp_df = df[~np.isnan(df['weekly_icu_admissions' ])]
print(temp_df.shape)
print(temp_df.groupby(['location']).mean())
# INDIA
df_india = df[df['location']=='India']
df_india_total_cases = df_india['total_cases']
df_india_total_cases = df_india_total_cases.fillna(0)
# plt.plot(df_india_total_cases)
df_india_total_cases_per_million = df_india['total_cases_per_million']
df_india_total_cases_per_million = df_india_total_cases_per_million.fillna(0)
# plt.plot(df_india_total_cases_per_million)
df_india_new_cases = df_india['new_cases_smoothed']
df_india_new_cases = df_india_new_cases.fillna(0)
# plt.plot(df_india_new_cases)
df_india_new_cases_per_million = df_india['new_cases_smoothed_per_million']
df_india_new_cases_per_million = df_india_new_cases_per_million.fillna(0)
# plt.plot(df_india_new_cases_per_million)
df_india_total_death = df_india['total_deaths']
df_india_total_death = df_india_total_death.fillna(0)
# plt.plot(df_india_total_death)
df_india_total_deaths_per_million = df_india['total_deaths_per_million']
df_india_total_deaths_per_million = df_india_total_deaths_per_million.fillna(0)
# plt.plot(df_india_total_deaths_per_million)
# handle outlier here
df_india_new_death = df_india['new_deaths_smoothed']
df_india_new_death = df_india_new_death.fillna(0)
# plt.plot(df_india_new_death)
df_india_new_deaths_per_million = df_india['new_deaths_smoothed_per_million']
df_india_new_deaths_per_million = df_india_new_deaths_per_million.fillna(0)
# plt.plot(df_india_new_deaths_per_million)
df_india_reproduction = df_india['reproduction_rate']
# missing_values_count = df_india_reproduction.isnull().sum()
# print(missing_values_count)
x=df_india_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_india_reproduction = df_india_reproduction.fillna(val)
df_india_reproduction.iloc[-1] = val1
# plt.plot(df_india_reproduction)
df_india_stringency = df_india['stringency_index']
df_india_stringency = df_india_stringency.fillna(method = 'bfill', axis=0).fillna(df_india_stringency.dropna().mean())
plt.plot(df_india_stringency)
# ISRAEL
df_israel = df[df['location']=='Israel']
df_israel_total_cases = df_israel['total_cases']
df_israel_total_cases = df_israel_total_cases.fillna(0)
# plt.plot(df_israel_total_cases)
df_israel_total_cases_per_million = df_israel['total_cases_per_million']
df_israel_total_cases_per_million = df_israel_total_cases_per_million.fillna(0)
df_israel_total_cases_per_million.iloc[-1]=df_israel_total_cases_per_million.iloc[-2]
# plt.plot(df_israel_total_cases_per_million)
df_israel_new_cases = df_israel['new_cases_smoothed']
df_israel_new_cases = df_israel_new_cases.fillna(0)
# plt.plot(df_israel_new_cases)
df_israel_new_cases_per_million = df_israel['new_cases_smoothed_per_million']
df_israela_new_cases_per_million = df_israel_new_cases_per_million.fillna(0)
# plt.plot(df_israel_new_cases_per_million)
df_israel_total_death = df_israel['total_deaths']
df_israel_total_death = df_israel_total_death.fillna(0)
# plt.plot(df_israel_total_death)
df_israel_total_deaths_per_million = df_israel['total_deaths_per_million']
df_israel_total_deaths_per_million = df_israel_total_deaths_per_million.fillna(0)
df_israel_total_deaths_per_million.iloc[-1] = df_israel_total_deaths_per_million.iloc[-2]
# plt.plot(df_israel_total_deaths_per_million)
# handle outlier here
df_israel_new_death = df_israel['new_deaths_smoothed']
df_israel_new_death = df_israel_new_death.fillna(0)
# plt.plot(df_israel_new_death)
df_israel_new_deaths_per_million = df_israel['new_deaths_smoothed_per_million']
df_israel_new_deaths_per_million = df_israel_new_deaths_per_million.fillna(0)
# plt.plot(df_israel_new_deaths_per_million)
df_israel_reproduction = df_israel['reproduction_rate']
# missing_values_count = df_israel_reproduction.isnull().sum()
# print(missing_values_count)
x=df_israel_reproduction.dropna()
val1 =x.iloc[-1]
# df_israel_reproduction = df_israel_reproduction.fillna(val)
# df_israel_reproduction.iloc[-1] = val1
# df_israel_reproduction.iloc[0] = val
df_israel_reproduction = df_israel_reproduction.fillna(method = 'bfill', axis=0).fillna(val1)
# plt.plot(df_israel_reproduction)
df_israel_stringency = df_israel['stringency_index']
df_israel_stringency = df_israel_stringency.fillna(method = 'bfill', axis=0).fillna(df_israel_stringency.dropna().mean())
plt.plot(df_israel_stringency)
# USA
df_usa = df[df['location']=='United States']
df_usa_total_cases = df_usa['total_cases']
df_usa_total_cases = df_usa_total_cases.fillna(0)
# plt.plot(df_usa_total_cases)
df_usa_total_cases_per_million = df_usa['total_cases_per_million']
df_usa_total_cases_per_million = df_usa_total_cases_per_million.fillna(0)
# plt.plot(df_usa_total_cases_per_million)
df_usa_new_cases = df_usa['new_cases_smoothed']
df_usa_new_cases = df_usa_new_cases.fillna(0)
# plt.plot(df_usa_new_cases)
df_usa_new_cases_per_million = df_usa['new_cases_smoothed_per_million']
df_usa_new_cases_per_million = df_usa_new_cases_per_million.fillna(0)
# plt.plot(df_usa_new_cases_per_million)
df_usa_total_death = df_usa['total_deaths']
df_usa_total_death = df_usa_total_death.fillna(0)
# plt.plot(df_usa_total_death)
df_usa_total_deaths_per_million = df_usa['total_deaths_per_million']
df_usa_total_deaths_per_million = df_usa_total_deaths_per_million.fillna(0)
# plt.plot(df_usa_total_deaths_per_million)
df_usa_new_death = df_usa['new_deaths_smoothed']
df_usa_new_death = df_usa_new_death.fillna(0)
# plt.plot(df_usa_new_death)
df_usa_new_deaths_per_million = df_usa['new_deaths_smoothed_per_million']
df_usa_new_deaths_per_million = df_usa_new_deaths_per_million.fillna(0)
# plt.plot(df_usa_new_deaths_per_million)
df_usa_reproduction = df_usa['reproduction_rate']
x=df_usa_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_usa_reproduction = df_usa_reproduction.fillna(val)
df_usa_reproduction.iloc[-1] = val1
# plt.plot(df_usa_reproduction)
df_usa_stringency = df_usa['stringency_index']
df_usa_stringency = df_usa_stringency.fillna(method = 'bfill', axis=0).fillna(df_usa_stringency.dropna().mean())
plt.plot(df_usa_stringency)
# Italy
df_italy = df[df['location']=='Italy']
df_italy_total_cases = df_italy['total_cases']
df_italy_total_cases = df_italy_total_cases.fillna(0)
# plt.plot(df_italy_total_cases)
df_italy_total_cases_per_million = df_italy['total_cases_per_million']
df_italy_total_cases_per_million = df_italy_total_cases_per_million.fillna(0)
# plt.plot(df_italy_total_cases_per_million)
df_italy_new_cases = df_italy['new_cases_smoothed']
df_italy_new_cases = df_italy_new_cases.fillna(0)
# plt.plot(df_italy_new_cases)
# plt.show()
df_italy_new_cases_per_million = df_italy['new_cases_smoothed_per_million']
df_italy_new_cases_per_million = df_italy_new_cases_per_million.fillna(0)
# plt.plot(df_italy_new_cases_per_million)
df_italy_total_death = df_italy['total_deaths']
df_italy_total_death = df_italy_total_death.fillna(0)
# plt.plot(df_italy_total_death)
df_italy_total_deaths_per_million = df_italy['total_deaths_per_million']
df_italy_total_deaths_per_million = df_italy_total_deaths_per_million.fillna(0)
# plt.plot(df_italy_total_deaths_per_million)
df_italy_new_death = df_italy['new_deaths_smoothed'].clip(lower=0)
df_italy_new_death = df_italy_new_death.fillna(0)
# df_italy_new_death = df_italy_new_death.clip(lower=0)
# plt.plot(df_italy_new_death)
df_italy_new_deaths_per_million = df_italy['new_deaths_smoothed_per_million'].clip(lower=0)
df_italy_new_deaths_per_million = df_italy_new_deaths_per_million.fillna(0)
# plt.plot(df_italy_new_deaths_per_million)
df_italy_reproduction = df_italy['reproduction_rate']
x=df_italy_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_italy_reproduction = df_italy_reproduction.fillna(val)
df_italy_reproduction.iloc[-1] = val1
# plt.plot(df_italy_reproduction)
df_italy_stringency = df_italy['stringency_index']
df_italy_stringency = df_italy_stringency.fillna(method = 'bfill', axis=0).fillna(df_italy_stringency.dropna().mean())
plt.plot(df_italy_stringency)
# New Zealand
df_nz = df[df['location']=='New Zealand']
df_nz_total_cases = df_nz['total_cases']
df_nz_total_cases = df_nz_total_cases.fillna(0)
# plt.plot(df_nz_total_cases)
df_nz_total_cases_per_million = df_nz['total_cases_per_million']
df_nz_total_cases_per_million = df_nz_total_cases_per_million.fillna(0)
# plt.plot(df_nz_total_cases_per_million)
df_nz_new_cases = df_nz['new_cases_smoothed']
df_nz_new_cases = df_nz_new_cases.fillna(0)
# plt.plot(df_nz_new_cases)
# plt.show()
df_nz_new_cases_per_million = df_nz['new_cases_smoothed_per_million']
df_nz_new_cases_per_million = df_nz_new_cases_per_million.fillna(0)
# plt.plot(df_nz_new_cases_per_million)
df_nz_total_death = df_nz['total_deaths']
df_nz_total_death = df_nz_total_death.fillna(0)
# plt.plot(df_nz_total_death)
df_nz_total_deaths_per_million = df_nz['total_deaths_per_million']
df_nz_total_deaths_per_million = df_nz_total_deaths_per_million.fillna(0)
# plt.plot(df_nz_total_deaths_per_million)
df_nz_new_death = df_nz['new_deaths_smoothed']
df_nz_new_death = df_nz_new_death.fillna(0)
df_nz_new_death = df_nz_new_death.clip(lower=0)
# plt.plot(df_nz_new_death)
df_nz_new_deaths_per_million = df_nz['new_deaths_smoothed_per_million']
df_nz_new_deaths_per_million = df_nz_new_deaths_per_million.fillna(0)
# plt.plot(df_nz_new_deaths_per_million)
df_nz_reproduction = df_nz['reproduction_rate']
x=df_nz_reproduction.dropna()
val = x.iloc[0]
val1 =x.iloc[-1]
df_nz_reproduction = df_nz_reproduction.fillna(val)
df_nz_reproduction.iloc[-1] = val1
# plt.plot(df_nz_reproduction)
df_nz_stringency = df_nz['stringency_index']
df_nz_stringency = df_nz_stringency.fillna(method = 'bfill', axis=0).fillna(df_nz_stringency.dropna().mean())
plt.plot(df_nz_stringency)
# WORLD
df_w = df[df['location']=='World']
df_w_total_cases = df_w['total_cases']
df_w_total_cases = df_w_total_cases.fillna(0)
df_w_total_deaths = df_w['total_deaths']
df_w_total_deaths = df_w_total_deaths.fillna(0)
df_w_total_tests = df_w['total_tests']
df_w_total_tests = df_w_total_tests.fillna(0)
df_w_new_cases = df_w['new_cases']
df_w_new_cases = df_w_new_cases.fillna(0)
df_w_new_cases_smoothed = df_w['new_cases_smoothed']
df_w_new_cases_smoothed = df_w_new_cases.fillna(0)
# Stringency Index
plt.figure()
plt.plot(df_india_stringency.to_numpy(),label='ind')
plt.plot(df_israel_stringency.to_numpy(),label='isr')
plt.plot(df_usa_stringency.to_numpy(),label='usa')
plt.plot(df_italy_stringency.to_numpy(),label='ita')
plt.plot(df_nz_stringency.to_numpy(),label='nz')
plt.legend()
plt.grid()
import pycountry
import plotly.express as px
df1 = df
list_countries = df1['location'].unique().tolist()
d_country_code = {}
for country in list_countries:
try:
country_data = pycountry.countries.search_fuzzy(country)
country_code = country_data[0].alpha_3
d_country_code.update({country: country_code})
except:
print('could not add ISO 3 code for ->', country)
d_country_code.update({country: ' '})
for k, v in d_country_code.items():
df1.loc[(df1.location == k), 'iso_alpha'] = v
fig = px.choropleth(df1, # Input Dataframe
locations="iso_alpha", # identify country code column
color="total_cases", # identify representing column
hover_name="location", # identify hover name
animation_frame="date", # identify date column
projection="natural earth", # select projection
color_continuous_scale = 'Peach', # select prefer color scale
range_color=[0,40000000] # select range of dataset
)
fig.show()
x = np.array(["India","USA","New Zealand","Italy","Isreal"])
y = [df_india_total_cases_per_million.values[-2],df_usa_total_cases_per_million.values[-2],df_nz_total_cases_per_million.values[-2],df_italy_total_cases_per_million.values[-2],df_israel_total_cases_per_million.values[-2]]
plt.figure(2)
plt.figure(figsize=(18, 10))
ax = plt.subplot(111)
r2 = ax.bar(x, y, width=0.25, color='b', align='center')
#plt.legend(['Total Cases per Million Population'], loc=0, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2, 1+height,'%0.1f' % float(height),ha='center', va='bottom')
plt.title('Total Cases per Million Population(log)', fontsize=30)
plt.ylabel('Count per million', fontsize=20)
plt.xlabel('Country', fontsize=20)
autolabel(r2)
plt.yscale("log")
plt.show()
plt.figure(figsize=(6, 4))
heatmap = sns.heatmap(df[['total_cases','total_tests','total_deaths']].corr(), vmin=0, vmax=1, annot=True)
heatmap.set_title('Pearson Correlation Heatmap', fontdict={'fontsize':12}, pad=12);
plt.figure(figsize=(6, 4))
corr = df[['total_cases','total_tests','total_deaths']].corr(method = 'spearman')
heatmap = sns.heatmap(corr, vmin=0, vmax=1, annot=True)
heatmap.set_title('Spearman Correlation Heatmap', fontdict={'fontsize':12}, pad=12);
x = np.array(["India","USA","New Zealand","Italy","Israel"])
y = [np.divide(df_india_total_tests.values,df_india_total_cases.values)[-3],np.divide(df_usa_total_tests.values,df_usa_total_cases.values)[-6],np.divide(df_nz_total_tests.values,df_nz_total_cases.values)[-3],np.divide(df_italy_total_tests.values,df_italy_total_cases.values)[-3],np.divide(df_israel_total_tests.values,df_israel_total_cases.values)[-4]]
plt.figure(2)
plt.figure(figsize=(18, 10))
ax = plt.subplot(111)
r2 = ax.bar(x, y, width=0.25, color='b', align='center')
#plt.legend(['Total Cases per Million Population'], loc=0, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
def autolabel(rects):
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x() + rect.get_width()/2, 1+height,'%0.1f' % float(height),ha='center', va='bottom')
plt.title('Test-Cases ratio', fontsize=30)
plt.ylabel('Ratio(log)', fontsize=30)
plt.xlabel('Country', fontsize=30)
plt.xticks(fontsize= 20)
plt.yscale("log")
autolabel(r2)
plt.show()
import plotly.express as px
x=np.array([df_india_total_tests.values[-3],df_usa_total_tests.values[-6],df_nz_total_tests.values[-3],df_italy_total_tests.values[-3],df_israel_total_tests.values[-4]])
y = np.array([df_india_total_cases.values[-3],df_usa_total_cases.values[-6],df_nz_total_cases.values[-3],df_italy_total_cases.values[-3],df_israel_total_cases.values[-4]])
tcr = np.array([np.divide(df_india_total_tests.values,df_india_total_cases.values)[-3],np.divide(df_usa_total_tests.values,df_usa_total_cases.values)[-6],np.divide(df_nz_total_tests.values,df_nz_total_cases.values)[-3],np.divide(df_italy_total_tests.values,df_italy_total_cases.values)[-3],np.divide(df_israel_total_tests.values,df_israel_total_cases.values)[-4]])
dat = pd.DataFrame(columns =['total_tests', 'total_cases','tcr',"location"])
dat['total_tests'] = pd.Series(x)
dat['total_cases'] = pd.Series(y)
dat['tcr'] = pd.Series(tcr)
dat['location'] = pd.Series(["India","USA","New Zealand","Italy","Israel"])
fig = px.scatter(dat, x="total_tests", y="total_cases",
size="tcr", color="tcr",
hover_name="location", size_max=70, text="location")
fig.update_traces(textposition='top center')
fig.show()
search = pd.read_csv('searchCOVID.csv')
world = df_w_total_cases[df_w_total_cases.index % 7 == 0]
df_india_total_cases = df_india_total_cases[df_india_total_cases.index % 7 == 0]
df_usa_total_cases = df_usa_total_cases[df_usa_total_cases.index % 7 == 0]
df_nz_total_cases = df_nz_total_cases[df_nz_total_cases.index % 7 == 0]
df_israel_total_cases = df_israel_total_cases[df_israel_total_cases.index % 7 == 0]
df_italy_total_cases = df_italy_total_cases[df_italy_total_cases.index % 7 == 0]
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['covid'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['vaccine'], color='g')
plt.plot([i for i in range(len(search['Week']))], world/max(world)*100, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total global cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Global)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
autolabel(r2)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['india_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['india_v'], color='g')
plt.plot((df_india_total_cases/max(df_india_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(India)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['usa_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['usa_v'], color='g')
plt.plot((df_usa_total_cases/max(df_usa_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(USA)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['nz_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['nz_v'], color='g')
plt.plot((df_nz_total_cases/max(df_nz_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(New Zealand)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['italy_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['italy_v'], color='g')
plt.plot((df_italy_total_cases/max(df_italy_total_cases)*100).values, color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Italy)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
plt.figure(figsize=(18, 10))
plt.plot([i for i in range(len(search['Week']))], search['israel_c'], color='r')
plt.plot([i for i in range(len(search['Week']))], search['israel_v'], color='g')
plt.plot((df_israel_total_cases/max(df_israel_total_cases)*100).values[0:-2], color='b')
plt.legend(['Searches for COVID','Searches for its Vaccine','Total cases'], loc=1, fontsize=15)
plt.grid(b = True, alpha = 0.8, linewidth = 1)
plt.title('Google searches(Israel)', fontsize=30)
plt.ylabel('Value', fontsize=20)
plt.xlabel('Week', fontsize=20)
plt.show()
import math
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
import statsmodels.tsa.api as smt
import statsmodels.formula.api as smf
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
def model(treand):
scaler = MinMaxScaler()
treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
x, y = create_dataset(treand_s)
x = x.reshape(x.shape[0], x.shape[1])
x_train = x[0:len(x)]
y_train = y[0:len(x)]
from sklearn.neural_network import MLPRegressor
clf = MLPRegressor(activation = 'tanh',solver='adam')
clf.fit(x_train,y_train)
train_pred = clf.predict(x_train)
plt.figure(figsize=(18, 10))
plt.plot(scaler.inverse_transform(train_pred.reshape(-1,1)),label='Prediction')
plt.plot(scaler.inverse_transform(y_train.reshape(-1,1)),color='red', label='Original')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('World COVID cases')
plt.show()
return scaler.inverse_transform(train_pred.reshape(-1,1))
t1 = model(df_w_total_cases)
from sklearn.preprocessing import MinMaxScaler
def model(treand):
scaler = MinMaxScaler()
treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
x, y = create_dataset(treand_s)
x = x.reshape(x.shape[0], x.shape[1])
x_train = x[0:len(x)]
y_train = y[0:len(x)]
x_test = x[0:len(x)]
y_test = y[0:len(x)]
from sklearn.neural_network import MLPRegressor
clf = MLPRegressor(activation = 'tanh',solver='adam')
clf.fit(x_train,y_train)
train_pred = clf.predict(x_test)
plt.figure(figsize=(18, 10))
plt.plot(scaler.inverse_transform(train_pred.reshape(-1,1)),label='Prediction')
plt.plot(scaler.inverse_transform(y_train.reshape(-1,1)),color='red', label='Original')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.title('New Cases Globally')
plt.show()
return scaler.inverse_transform(train_pred.reshape(-1,1))
t1 = model(df_india_total_cases)
treand = df_india_total_cases
scaler = MinMaxScaler()
treand_s = scaler.fit_transform(treand.values.reshape(-1,1))
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
x, y = create_dataset(treand_s)
x = x.reshape(x.shape[0], x.shape[1])
x_train = x[0:len(x)]
y_train = y[0:len(x)]
# Total cases per million
plt.figure(figsize=(8,5))
plt.plot(df_india_total_cases_per_million.to_numpy(),label='India')
plt.plot(df_israel_total_cases_per_million.to_numpy(),label='Israel')
plt.plot(df_usa_total_cases_per_million.to_numpy(),label='USA')
plt.plot(df_italy_total_cases_per_million.to_numpy(),label='Italy')
plt.plot(df_nz_total_cases_per_million.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Cases per million population')
plt.legend()
plt.title('Total cases per million population')
plt.show()
# New cases per million per day
plt.figure(figsize=(8,5))
plt.plot(df_india_new_cases_per_million.to_numpy(),label='India')
plt.plot(df_israel_new_cases_per_million.to_numpy(),label='Israel')
plt.plot(df_usa_new_cases_per_million.to_numpy(),label='USA')
plt.plot(df_italy_new_cases_per_million.to_numpy(),label='Italy')
plt.plot(df_nz_new_cases_per_million.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Cases per million population')
plt.legend()
plt.title('New cases per million population')
plt.show()
# ISRAEL HAS HERD IMMUNITY
# Total cases
plt.figure(figsize=(8,5))
plt.plot(df_india_total_cases.to_numpy(),label='India')
plt.plot(df_israel_total_cases.to_numpy(),label='Israel')
plt.plot(df_usa_total_cases.to_numpy(),label='USA')
plt.plot(df_italy_total_cases.to_numpy(),label='Italy')
plt.plot(df_nz_total_cases.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Total Cases ')
plt.legend()
plt.title('Total cases')
plt.show()
# New cases per day
plt.figure(figsize=(8,5))
plt.plot(df_india_new_cases.to_numpy(),label='India')
plt.plot(df_israel_new_cases.to_numpy(),label='Israel')
plt.plot(df_usa_new_cases.to_numpy(),label='USA')
plt.plot(df_italy_new_cases.to_numpy(),label='Italy')
plt.plot(df_nz_new_cases.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('New Cases ')
plt.legend()
plt.title('New cases')
plt.show()
# Total deaths per million
plt.figure(figsize=(8,5))
plt.plot(df_india_total_deaths_per_million.to_numpy(),label='India')
plt.plot(df_israel_total_deaths_per_million.to_numpy(),label='Israel')
plt.plot(df_usa_total_deaths_per_million.to_numpy(),label='USA')
plt.plot(df_italy_total_deaths_per_million.to_numpy(),label='Italy')
plt.plot(df_nz_total_deaths_per_million.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Deaths per million population')
plt.legend()
plt.title('Total Deaths per million population')
plt.show()
print(df_nz_total_deaths_per_million.shape)
print(df_usa_total_deaths_per_million.shape)
# New deaths per million
plt.figure(figsize=(8,5))
plt.plot(df_india_new_deaths_per_million.to_numpy(),label='India')
plt.plot(df_israel_new_deaths_per_million.to_numpy(),label='Israel')
plt.plot(df_usa_new_deaths_per_million.to_numpy(),label='USA')
plt.plot(df_italy_new_deaths_per_million.to_numpy(),label='Italy')
plt.plot(df_nz_new_deaths_per_million.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Deaths per million population')
plt.legend()
plt.title('New Deaths per million population')
plt.show()
# New deaths
plt.figure(figsize=(8,5))
plt.plot(df_india_new_death.to_numpy(),label='India')
plt.plot(df_israel_new_death.to_numpy(),label='Israel')
plt.plot(df_usa_new_death.to_numpy(),label='USA')
plt.plot(df_italy_new_death.to_numpy(),label='Italy')
plt.plot(df_nz_new_death.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Deaths ')
plt.legend()
plt.title('New Deaths ')
plt.show()
# Total deaths
plt.figure(figsize=(8,5))
plt.plot(df_india_total_death.to_numpy(),label='India')
plt.plot(df_israel_total_death.to_numpy(),label='Israel')
plt.plot(df_usa_total_death.to_numpy(),label='USA')
plt.plot(df_italy_total_death.to_numpy(),label='Italy')
plt.plot(df_nz_total_death.to_numpy(),label='New Zealand')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Deaths ')
plt.legend()
plt.title('Total Deaths ')
plt.show()
# Country wise stringency index analysis
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_india_new_cases.to_numpy()*100/max(df_india_new_cases),label='Cases',color='r')
ax2.plot((df_india_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for India')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=2)
ax2.legend(loc=1)
plt.grid()
plt.show()
print()
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_israel_new_cases.to_numpy()*100/max(df_israel_new_cases),label='Cases',color='r')
ax2.plot((df_israel_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for Israel')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=1)
ax2.legend(loc=2)
plt.grid()
plt.show()
print()
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_nz_new_cases.to_numpy()*100/max(df_nz_new_cases),label='Cases',color='r')
ax2.plot((df_nz_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for New Zealand')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=1)
ax2.legend(loc=2)
plt.grid()
plt.show()
print()
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_usa_new_cases.to_numpy()*100/max(df_usa_new_cases),label='Cases',color='r')
ax2.plot((df_usa_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for USA')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=1)
ax2.legend(loc=2)
plt.grid()
plt.show()
print()
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_italy_new_cases.to_numpy()*100/max(df_italy_new_cases),label='Cases',color='r')
ax2.plot((df_italy_stringency.to_numpy()),label='Stringency Index')
ax1.set_title('Stringency index analysis for Italy')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Stringency Index')
ax1.legend(loc=1)
ax2.legend(loc=2)
plt.grid()
plt.show()
print()
df_gov.head()
df_world_index = df_gov[df_gov['CountryName'].isin(['India','Israel','United States','New Zealand','Italy' ])]
df_world_index = df_world_index[df_world_index['RegionName'].isnull()]
df_world_index = pd.concat([df_world_index['CountryName'],df_world_index['Date'],df_world_index['C1_School closing'],df_world_index['C2_Workplace closing'],df_world_index['C3_Cancel public events'],
df_world_index['C4_Restrictions on gatherings'],df_world_index['C5_Close public transport'],df_world_index['C6_Stay at home requirements'],
df_world_index['C7_Restrictions on internal movement'],df_world_index['C8_International travel controls']],axis=1)
# /////////////////////
# Initial State
df_temp = df_world_index[df_world_index['Date'].between(20200215, 20200305, inclusive=False)]
df_temp = df_temp.groupby('CountryName').mean()
print(df_world_index.shape)
print(df_temp.shape)
plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(df_temp.drop([ 'Date'], axis=1), annot=True)
heatmap.set_title('Heatmap', fontdict={'fontsize':18})
plt.show()
print()
# /////////////////////
# Medieval State
df_temp = df_world_index[df_world_index['Date'].between(20201215, 20210115, inclusive=False)]
df_temp = df_temp.groupby('CountryName').mean()
print(df_world_index.shape)
print(df_temp.shape)
plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(df_temp.drop(['Date'], axis=1), annot=True)
heatmap.set_title('Heatmap', fontdict={'fontsize':18})
plt.show()
# /////////////////////
# Feature Analysis
df_temp = df_world_index[df_world_index['CountryName']=='India'].drop('Date',axis=1)
corr_matrix = df_temp.corr(method='spearman')
plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(corr_matrix, vmin=-1, vmax=1, annot=True)
heatmap.set_title('Spearman Correlation Matrix Heatmap', fontdict={'fontsize':18})
plt.show()
plt.figure(figsize=(8,5))
df_world_index1 = df_world_index[df_world_index['CountryName']=='India']
plt.plot(df_world_index1['C1_School closing'].to_numpy(),label='School closing')
plt.plot(df_world_index1['C2_Workplace closing'].to_numpy(),label='Workplace closing')
plt.plot(df_world_index1['C3_Cancel public events'].to_numpy(),label='Cancel public events')
plt.plot(df_world_index1['C4_Restrictions on gatherings'].to_numpy(),label='Restrictions on gatherings')
plt.plot(df_world_index1['C5_Close public transport'].to_numpy(),label='Close public transport')
plt.plot(df_world_index1['C6_Stay at home requirements'].to_numpy(),label='Stay at home requirements')
plt.plot(df_world_index1['C7_Restrictions on internal movement'].to_numpy(),label='Restrictions on internal movement')
plt.plot(df_world_index1['C8_International travel controls'].to_numpy(),label='International travel controls')
plt.grid()
plt.xlabel('Days')
plt.ylabel('Index ')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.title('Governmental Measures ')
plt.show()
# Reproduction Number correlation with new cases
df_temp = pd.concat([df_india_reproduction,df_india_new_cases],axis=1)
corr_matrix = df_temp.corr(method='spearman')
print(corr_matrix)
print()
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(df_india_new_cases.to_numpy()/max(df_india_new_cases),label='Cases',color='r')
ax2.plot((df_india_reproduction.to_numpy()),label='Reproduction Number')
ax1.set_title('Reproduction Number analysis for India')
plt.xlabel('Days')
ax1.set_ylabel('Normalized Cases')
ax2.set_ylabel('Reproduction Number')
ax1.legend(loc=3)
ax2.legend(loc=1)
plt.grid()
plt.show()
print()